hi, i was working on the cartpole problem from the openai gym following your tutorial and i was converting your abstracted tflearn code to simple tensorflow code following all your tutorials. but it seems that the weights are not converging at all. the model is similar's the the one in the tutorial. i have also applied regularization top of your code to avoid any overfitting but it seems it is always choosing one option in favor of another. Please help me out.
[/ import numpy as np import tensorflow as tf import gym import os import random
def neural_network(x): x = tf.nn.relu(tf.add(tf.matmul(x, weights[1]), biases[1])) x = tf.nn.dropout(x, 0.8) x = tf.nn.relu(tf.add(tf.matmul(x, weights[2]), biases[2])) x = tf.nn.dropout(x, 0.8) x = tf.nn.relu(tf.add(tf.matmul(x, weights[3]), biases[3])) x = tf.nn.dropout(x, 0.8) x = tf.nn.relu(tf.add(tf.matmul(x, weights[4]), biases[4])) x = tf.nn.dropout(x, 0.8) x = tf.nn.relu(tf.add(tf.matmul(x, weights[5]), biases[5])) x = tf.nn.dropout(x, 0.8) x = tf.add(tf.matmul(x, weights[6]), biases[6]) return x
def test_nn(x): x = tf.nn.relu(tf.add(tf.matmul(x, weights[1]), biases[1])) x = tf.nn.relu(tf.add(tf.matmul(x, weights[2]), biases[2])) x = tf.nn.relu(tf.add(tf.matmul(x, weights[3]), biases[3])) x = tf.nn.relu(tf.add(tf.matmul(x, weights[4]), biases[4])) x = tf.nn.relu(tf.add(tf.matmul(x, weights[5]), biases[5])) x = tf.nn.softmax(tf.add(tf.matmul(x, weights[6]), biases[6])) return x
def train_nn(): prediction = neural_network(x) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)) lo=tf.nn.l2_loss(weights[1])+tf.nn.l2_loss(weights[2])+tf.nn.l2_loss(weights[3])+tf.nn.l2_loss(weights[4])+tf.nn.l2_loss(weights[5])+tf.nn.l2_loss(weights[6]) loss=tf.reduce_mean(loss+0.01*lo) optimizer = tf.train.AdamOptimizer().minimize(loss) test_pred = test_nn(x) correct = tf.equal(tf.argmax(test_pred, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct, dtype=tf.float32)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) epoches = 5 batch_size = 100 for j in range(epoches): ep_loss=0 for i in range(0,len(train_x),batch_size): epoch_x=train_x[i:min(i+batch_size,len(train_x))] epoch_y = train_y[i:min(i + batch_size, len(train_y))] _,c=sess.run([optimizer,loss],feed_dict={x:epoch_x,y:epoch_y}) ep_loss+=c #print("Accuracy is {0}".format(sess.run(accuracy, feed_dict={x: epoch_x, y: epoch_y}))) print("epoch {0} completed out of {1} with loss {2}".format(j,epoches,ep_loss)) print("Accuracy is {0}".format(sess.run(accuracy,feed_dict={x:train_x,y:train_y})))
scores = [] choices = [] for each_game in range(10): print("game ", each_game) score = 0 game_memory = [] prev_obs = [] env.reset() for _ in range(500): env.render() if (len(prev_obs) == 0): action = random.randrange(0, 2) else: x1 = np.array([prev_obs]).reshape(-1,4) a = tf.argmax(test_pred, 1) action = sess.run(a, feed_dict={x: x1}) action=action[0]
choices.append(action) new_observation, reward, done, info = env.step(action) prev_obs = new_observation game_memory.append([new_observation, action]) score += reward if done: break